#!/usr/bin/python
"""
This program prints the CDS features that lie between a given coordinates
and also prints what class the top hits belong to.

Usage: python dissertation_CheckGenes.py GKIL.v6.gbf GKIL.v6.tophits_class.txt 10000 20000
"""

import sys
import re
from Bio import SeqIO

gbfile = sys.argv[1]
gb = SeqIO.read(gbfile, "genbank")

classfile = open(sys.argv[2], "rU")
cfl = classfile.readlines()

start = int(sys.argv[3]) 
stop = int(sys.argv[4])

classdict = {}

for line in cfl:
    l = line.split('\t')
    lt = l[0]
    classification = l[1].strip()
    classdict[lt] = classification

for feat in gb.features:
    if feat.type == 'CDS':
        if start < feat.location._start.position < stop and start < feat.location._end.position < stop:
            locustag = feat.qualifiers['locus_tag'][0]
            product = feat.qualifiers['product'][0]
            classif = ""
            if locustag in classdict:
                classif = classdict[locustag]
            print '{0}\t{1:22}\t{2}'.format(locustag, classif, product)

classfile.close()
    
